* This file reproduces figures and tables from 
* Sanghoon Lee and Jeffrey Lin (2017),
* "Natural Amenities, Neighborhood Dyanamics, and Persistence in the Spatial
*  Distribution of Income"
* Review of Economic Studies, forthcoming. 
* URL: TKTK

* Please see READ_ME.pdf for citations and additional information. 

* First version: February 27, 2017 by JL
	
clear all
pause off
capture log close
set graphics on

*** Change directory to put results here
*** Note that this code automatically creates a sub-directory structure
global prjfolder /Users/rnjxl02/Dropbox/Documents/Work/neighborhoods/RESTUD_final_submission/Lee_Lin_replication_files 

cd "$prjfolder"		
global results = "$prjfolder/results" 
capture mkdir $results

*** Change these flags to turn off/on desired sections 
local summary      1	/* Tables 1, B1, B2 and result reported in Section 3.2 text */
local anchor       0	/* Table 2 */
local anchorrobust 0	/* Table 3 and result reported in Section 4.1 text */
local endog        0	/* Table 4 */
local anchorgraph  0	/* Figure 2, B3 */
local anchortime   0	/* Figure 3 */
local sharebyshore 0	/* Figure 4 */
local thresh       0	/* Figure B4 */
local stable       0	/* Tables 5, 6, and Figure 5, 6 */
local suburb       0	/* Table 7, Figure 7, results reported in Sections 4.1 and 6 text*/

use $prjfolder/data/Lee_Lin_data.dta, clear
capture gen insample = ma_tracts>1 & lndens!=. & dpri!=.
capture replace insample = ma_tracts>1 & lndens!=. & dpri!=.

* Summary statistics
if `summary' {
capture mkdir $results/text

* Figure 1: Dallas versus LA
log using $results/figure1/dallas_vs_losangeles.log, replace
gen absdpri = abs(dpri)
sum absdpri if mashort=="Dallas TX" & year==1970
sum absdpri if mashort=="Los Angeles CA" & year==1970
log close
export delimited geo2010 absdpri if year==1970 & (mashort=="Dallas TX" | mashort=="Los Angeles CA") using $results/figure1/figure1data.csv, replace quote
export delimited geo2010 absdpri if year==1970 & (mashort=="Dallas TX" | mashort=="Los Angeles CA") using $prjfolder/map/dat/figure1data.csv, replace quote

* Table 1: Number of consistent neighborhoods by census year 
capture mkdir $results/table1
log using $results/table1/table1.log, replace
table year if tr_pop>0 & pri!=., c(count pri sum mytag )	
log close

* Table B1: Summary statistics (part)
capture mkdir $results/appendix
log using $results/appendix/tableb1.log, replace
sum dpri
sum tr_pop if year==2010
sum arealandsqkm if year==2010
capture drop foo
gen foo = tr_pop/arealandsqkm
sum foo if year==1880
sum foo if year==1960
sum foo if year==2010
drop foo
sum d2port d2cbdkm if year==2010
sum tr_hu_avgage if year==2010
sum i_shore i_lake i_river i_slope i_temp i_pre  if year==2010 & tr_pop>0 & pri!=.
sum i_flood if year==2010 & tr_pop>0 & pri!=. & validflood
sum pri if geo=="36061013000" & year==2010 /* Upper east side NY */
sum pri if geo=="06037800506" & year==2010 /* Malibu LA CA */
log close

* Text: Regression of rank by rent on rank by income reported in Section 3.2
preserve
table year, c(count tr_hh_avginc count tr_hu_avgrnt count tr_hh_avgocs count tr_pop_shlit)
capture gen lnrnt = log(tr_hu_avgrnt)
capture gen lninc = log(tr_hh_avginc)
capture gen lnocs = log(tr_hh_avgocs)
bysort mayear: egen rnkrnt = rank(lnrnt) 
bysort mayear: egen rnkinc = rank(lninc)
bysort mayear: egen cntrnt = count(lnrnt)
bysort mayear: egen cntinc = count(lninc)
gen prirnt = rnkrnt/cntrnt
gen priinc = rnkinc/cntinc
log using $results/text/rent_v_income.log, replace
reg prirnt priinc, noc vce(cluster mayear)
tab year if e(sample) 
log close
restore

* Text: Reported correlations between coast and lake proximity and controls
log using $results/text/correlations.log, replace
corr d2shore d2port d2cbd lndens tr_hu_avgage if year==2010 & ma_shore==1
corr d2lake d2port d2cbd lndens tr_hu_avgage if year==2010
log close

* Figure B2: New York rankings, 1880, 2010
preserve
keep if mashort=="New York NY"
keep geo pri0 pri10-pri130 year
reshape long pri, i(geo year) j(dy)
sort year
gen yr = year+dy
drop if yr>2010 | yr<1880
sort geo yr
levelsof year, local(NYyears)
twoway ///
(line pri yr if geo=="36061013000" & year==1880, lcolor("255 000   0") lpattern(1) legend(region(lwidth(none)) label(1 "Upper East Side") span c(4) size(small))) ///
(line pri yr if geo=="36061002100" & year==1880, lcolor("000 255 170") lpattern(1) legend(label(2 "Tribeca"))) ///
(line pri yr if geo=="36061019200" & year==1880, lcolor("000 085 255") lpattern(1) legend(label(3 "East Harlem"))) ///
(line pri yr if geo=="36059409000" & year==1880, lcolor("170 085 000") lpattern(1) legend(label(4 "Levittown"))) ///
(connected pri yr if geo=="36061013000" & year==1960, m(o) msize(small) mcolor("255 000   0") lcolor("255 000   0") lpattern(dot) legend(label(5 ""))) ///
(connected pri yr if geo=="36061002100" & year==1960, m(o) msize(small) mcolor("000 255 170") lcolor("000 255 170") lpattern(dot) legend(label(6 ""))) ///
(connected pri yr if geo=="36061019200" & year==1960, m(o) msize(small) mcolor("000 085 255") lcolor("000 085 255") lpattern(dot) legend(label(7 ""))) ///
(connected pri yr if geo=="36059409000" & year==1960, m(o) msize(small) mcolor("170 085 000") lcolor("170 085 000") lpattern(dot) legend(label(8 "")) ///
xline(1960, lpattern(dash) lcolor(black) lwidth(vthin)) ///
title(, size(small))  ///
xscale(range(1880 2010)) xlabel(`NYyears', labsize(small)) ///
ysize(6.5) xsize(9) ///
xtitle(" " "Year", size(small)) ///
xlabel(,labsize(small)) ylabel(,format(%9.1f) labsize(small)) ///
ytitle("Percentile Rank" " ", size(small)))
graph export $results/appendix/figureb2.pdf, replace
graph export $results/appendix/figureb2.eps, replace
restore
}
*

* Section 4: Anchoring results 
if `anchor' {	
capture mkdir $results/table2
capture encode mayear, gen(mygroup)
capture xtset mygroup

capture log close
capture estimates drop _all

* Regressions
local var "shore"
local varlist "i_shore pri lndport lndens lndcbd lnhuage"
qui areg dpri i_shore pri if insample, absorb (mayear) vce(cluster mayear)
est sto `var'1
log using $results/table2/stats.log, replace
sum dpri if e(sample)
codebook cbsa if e(sample)
codebook year if e(sample)
log close
qui areg dpri i_shore pri lndport if insample, absorb (mayear) vce(cluster mayear)
est sto `var'2
qui areg dpri i_shore pri lndcbd if insample,  absorb (mayear) vce(cluster mayear)
est sto `var'3
qui areg dpri i_shore pri lndport lndens lnhuage lndcbd if insample, absorb (mayear) vce(cluster mayear)
est sto `var'4

capture drop foo
qui gen foo = i_shore & pri>=0.9
rename (i_shore foo) (foo i_shore)
qui areg dpri i_shore pri lndport lndens lnhuage lndcbd if insample, absorb (mayear) vce(cluster mayear)
est sto shore5
rename (i_shore foo) (foo i_shore)

capture drop foo
capture gen foo = 0
foreach nn in beach ocean coast shore beach cove bay lagoon {
replace foo = 1 if i_shore==1 & nm_`nn'==1
}
rename (i_shore foo) (foo i_shore)
qui areg dpri i_shore pri lndport lndens lnhuage lndcbd if insample, absorb (mayear) vce(cluster mayear)
est sto shore6
rename (i_shore foo) (foo i_shore)

local reglist "shore1 shore2 shore3 shore4 shore5 shore6"
* Table 2: Coastal proximity anchors neighborhoods to high incomes 
log using $results/table2/stats.log, append
codebook cbsa if insample & mytag
codebook cbsa if insample & mytag & lndport!=.
codebook cbsa if insample & mytag & lndens!=.
codebook cbsa if insample & mytag & lnhuage!=.
codebook cbsa if insample & mytag & lndcbd!=.
codebook cbsa  if insample & mytag & lndport!=. & lndens!=. & lnhuage!=. & lndcbd!=.
codebook geo2010 if insample
estimates table `reglist', b(%9.3f) se(%9.3f) p(%9.3f)  stats(r2 rmse N N_clust) keep(`varlist')
log close
foreach var in i_shore pri lndport lndens lndcbd lnhuage {
qui sum `var' if insample
local m`var' = trim("`: display %9.2f r(mean)'") 
local sd`var' = trim("`: display %9.2f r(sd)'")
}
estout `reglist' using $results/table2/table2.txt, replace ///
cells(b(fmt(3) star) se(fmt(3) par)) ///
starlevels(\$^a\$ 0.10 \$^b\$ 0.05 \$^c\$ 0.01) ///
style(tex) ///
stats(r2 N N_clust, fmt(%9.3f %9.0gc %9.0gc) labels("\$R^2\$" "Neighborhoods")) ///
keep(`varlist') wrap varwidth(23) ///
varlabels(pri "[1em]`=char(13)'Initial \%ile rank by income \$(r_{i,t})\$" ///
i_shore "[1em]`=char(13)'1(Coast)\$^{*,\dag,\ddagger}\$" ///
lndens "[1em]`=char(13)'Log population density" ///
lndcbd "[1em]`=char(13)'Log distance to city center" ///
lnhuage "[1em]`=char(13)'Log average house age" ///
lndport "[1em]`=char(13)'Log distance to nearest seaport\$^{\mathsection}\$") ///
labcol2("`mi_shore' [`sdi_shore']" "`mpri' [`sdpri']" "`mlndport' [`sdlndport']" "`mlndens' [`sdlndens']"  "`mlndcbd' [`sdlndcbd']" "`mlnhuage' [`sdlnhuage']" , title("\$\mu\$ [\$\sigma\$]")) ///
numbers ///
prehead(\begin{tabular}{l*{@M}{c}c}  \hline\hline\\) ///
postfoot([1em]  \hline\hline \end{tabular})

estout `reglist' using $results/table2/table2.tab, replace ///
cells(b(fmt(3) star) se(fmt(3) par)) ///
starlevels(\$^a\$ 0.10 \$^b\$ 0.05 \$^c\$ 0.01) ///
style(tab) ///
stats(r2 N N_clust, fmt(%9.3f %9.0gc %9.0gc) labels("\$R^2\$" "Neighborhoods")) ///
keep(`varlist') wrap varwidth(23) ///
varlabels(pri "[1em]`=char(13)'Initial \%ile rank by income \$(r_{i,t})\$" ///
i_shore "[1em]`=char(13)'1(Coast)\$^{*,\dag,\ddagger}\$" ///
lndens "[1em]`=char(13)'Log population density" ///
lndcbd "[1em]`=char(13)'Log distance to city center" ///
lnhuage "[1em]`=char(13)'Log average house age" ///
lndport "[1em]`=char(13)'Log distance to nearest seaport\$^{\mathsection}\$") ///
labcol2("`mi_shore' [`sdi_shore']" "`mpri' [`sdpri']" "`mlndport' [`sdlndport']" "`mlndens' [`sdlndens']"  "`mlndcbd' [`sdlndcbd']" "`mlnhuage' [`sdlnhuage']" , title("\$\mu\$ [\$\sigma\$]")) ///
numbers 
}
*

* Figure 2 and Figure B3
if `anchorgraph' {
capture mkdir $results/figure2
* Compute nominal changes in income 
capture drop lnahi-ilnahi
sort geo2010 year
* drop outliers
replace tr_hh_avginc = . if tr_hh_avginc<300
* take logs and compute changes
gen lnahi = log(tr_hh_avginc)
gen nlnahi = lnahi[_n+1] if geo2010 == geo2010[_n+1]
gen dlnahi = nlnahi-lnahi
* de-mean by metropolitan area
bysort mayear: egen mlnahi = mean(lnahi)
bysort mayear: egen sdlnahi = sd(lnahi)
gen alnahi = abs(lnahi)
bysort mayear: egen mdlnahi = mean(dlnahi)
bysort mayear: egen malnahi = max(alnahi)
gen dmdlnahi = dlnahi -  mdlnahi
gen ilnahi = (lnahi - mlnahi)/malnahi

capture drop lpx*
capture drop lpy* 
capture drop lpse* 
capture drop lpi*
capture drop uci lci 
lpoly dpri pri if insample, ci noscatter gen(lpx lpy) se(lpse) nograph
gen uci = lpy + 1.96*lpse
gen lci = lpy - 1.96*lpse
lpoly dmdlnahi pri, ci noscatter gen(lpiy) at(lpx) se(lpise) nograph 
gen uci2 = lpiy + 1.96*lpise
gen lci2 = lpiy - 1.96*lpise
twoway ///
	(line lpy lpx, yaxis(1) lcolor(black) lpattern(solid) ///
	yline(0, lpattern(dot)) xline(0.5, lpattern(dot)) ///
	ytitle("Change in percentile rank" " ", size(small)) ///
	xtitle(" " "Initial percentile rank", size(small)) ///
	ylabel(#5,format(%9.1f) labsize(small)) ///
	xlabel(0 .25 .5 .75 1,format(%9.2f) labsize(small)) ///
	yscale(r(-.125 .125)) ///
	legend(rows(3) order(1 4 6) region(lcolor(white)) ///
	label(1 "Change in percentile rank (left scale)") ///
	label(4 "Log change in average household income (right scale)") ///
	label(6 "95% confidence interval") size(small) ring(0) pos(8)) ///
	) ///
	(line uci lpx, yaxis(1) lp(shortdash)  lcolor(gs12)) (line lci lpx, yaxis(1) lp(shortdash)  lcolor(gs12)) ///
	(line lpiy lpx, lp(dash_dot) yaxis(2) lcolor(black) ///
	ytitle(" " "Log change in average household income",axis(2) size(small)) ///
	ylabel(-.06(.02).06, format(%9.2f) labsize(small) axis(2)) yscale(r(-.06 .06) axis(2))) ///
	(line uci2 lpx, yaxis(2) lp(shortdash) lcolor(gs12)) (line lci2 lpx, yaxis(2) lp(shortdash) lcolor(gs12)) 
graph export $results/appendix/figureb3.pdf, replace
graph export $results/appendix/figureb3.eps, replace
capture drop lpx*
capture drop lpy* 
capture drop lpse* 
capture drop lpi*
capture drop uci lci 


foreach var in ahat {
capture drop supx supy infy
lpoly dpri pri if i_`var'==1, adoonly noscatter nograph gen(supx supy)
lpoly dpri pri if i_`var'==0, adoonly noscatter nograph gen(infy) at(supx)
twoway (line supy supx, lcolor(black) lpattern(solid) ///
legend(region(lcolor(white)) label(1 "Superior natural amenity neighborhoods") label(2 "Other neighborhoods") size(small) ring(0) pos(6)) ///
yline(0, lpattern(dot)) xline(0.5, lpattern(dot)) ///
ytitle("Change in percentile rank" " ", size(small) ) ///
xtitle(" " "Initial percentile rank by average household income", size(small) ) ///
ylabel(,format(%9.2f) labsize(small)) ///
xlabel(,format(%9.2f) labsize(small)) ///
) ///
(line infy supx, lcolor(gs8) lpattern(dash) ///
)
graph export "$results/figure2/figure2.pdf", replace
graph export "$results/figure2/figure2.eps", replace
capture drop supx supy infy
}
}
*

* Table 3
if `anchorrobust' {
capture mkdir $results/table3
log using $results/table3/stats.log, replace
log close
foreach var in shore lake river slope temp pre flood ahat {
    capture drop inatfeat
	gen byte inatfeat = i_`var'
	lab var inatfeat "Indicator for natural amenity"
	lab var pri "Initial percentile rank"
	qui areg dpri inatfeat pri if insample, vce(cluster mayear) absorb(mayear)
		est sto `var'1
log using $results/table3/stats.log, append
	sum inatfeat if e(sample)
	codebook geo2010 if e(sample)
	codebook mashort if e(sample)
log close		
	qui areg dpri inatfeat pri lndport lndens lnhuage lndcbd if insample, vce(cluster mayear) absorb(mayear)
		est sto `var'2
	capture replace inatfeat = i_`var' & pri>=0.9
 	qui areg dpri inatfeat pri lndport lndens lnhuage lndcbd if insample, absorb(mayear) vce(cluster cbsa)
		est sto `var'3
	drop inatfeat
}
*
capture program drop nmreg
program define nmreg
args natfeat names
capture drop inatfeat
gen byte inatfeat = 0
foreach nn in `names' {
replace inatfeat = 1 if i_`natfeat'==1 & nm_`nn'==1
}
*
log using $results/text/condition_on_names.log, append
di "`natfeat'"
sum pri if inatfeat
sum pri if i_`natfeat'
log close
qui areg dpri inatfeat pri lndport lndens lnhuage lndcbd if insample, absorb (mayear) vce(cluster mayear)
est sto `natfeat'4
end

log using $results/text/condition_on_names.log, replace
log close

nmreg shore	"beach ocean coast shore beach cove bay lagoon"
nmreg lake	"bay beach coast harbor island lagoon lake pond port shore water"
nmreg river "beach bend brook creek fall fork harbor island port rapid river run spring stream water"
nmreg slope	"bluff butte canyon cliff height high hill knoll mount ridge summit terrace vale valley"
nmreg pre	"bay beach cape coast cove island gulf lagoon lake ocean sea shore bluff butte cliff height high hill knoll mount ridge summit terrace"
nmreg flood "bluff butte cliff height high hill knoll mount ridge summit terrace"
nmreg ahat	"bay beach bluff brook butte cape cliff coast cove creek fair fall haven height high hill island knoll lagoon mount ocean pond poplar rapid river sea shore spring stream summit terrace"

local varlist "inatfeat"


estimates table shore1 lake1 river1 slope1  pre1 flood1 ahat1, b(%9.3f) se(%9.3f) p(%9.3f)  stats(r2 rmse N N_clust) keep(inatfeat)
estimates table shore2 lake2 river2 slope2  pre2 flood2 ahat2, b(%9.3f) se(%9.3f) p(%9.3f)  stats(r2 rmse N N_clust) keep(inatfeat)
estimates table shore3 lake3 river3 slope3  pre3 flood3 ahat3, b(%9.3f) se(%9.3f) p(%9.3f)  stats(r2 rmse N N_clust) keep(inatfeat)
estimates table shore4 lake4 river4 slope4  pre4 flood4 ahat4, b(%9.3f) se(%9.3f) p(%9.3f)  stats(r2 rmse N N_clust j jp widstat) keep(inatfeat)


	estout shore1 lake1 river1 slope1 pre1 flood1 ahat1 using $results/table3/table3.txt, replace ///
	label wrap ///
	cells(b(fmt(3) star) se(fmt(3) par)) starlevels(\$^a\$ 0.10 \$^b\$ 0.05 \$^c\$ 0.01) ///
	style(tex) ///
	keep(inatfeat) 
	
	estout shore2 lake2 river2 slope2 pre2 flood2 ahat2 using $results/table3/table3.txt, append ///
	label wrap ///
	cells(b(fmt(3) star) se(fmt(3) par)) starlevels(\$^a\$ 0.10 \$^b\$ 0.05 \$^c\$ 0.01) ///
	style(tex) ///
	keep(inatfeat) 

	estout shore3 lake3 river3 slope3 pre3 flood3 ahat3 using $results/table3/table3.txt, append ///
	label wrap ///
	cells(b(fmt(3) star) se(fmt(3) par)) starlevels(\$^a\$ 0.10 \$^b\$ 0.05 \$^c\$ 0.01) ///
	style(tex) ///
	keep(inatfeat) 
	
	estout shore4 lake4 river4 slope4 pre4 flood4 ahat4 using $results/table3/table3.txt, append ///
	label wrap ///
	cells(b(fmt(3) star) se(fmt(3) par)) starlevels(\$^a\$ 0.10 \$^b\$ 0.05 \$^c\$ 0.01) ///
	style(tex) ///
	keep(inatfeat) 
	
	
	estout shore1 lake1 river1 slope1 pre1 flood1 ahat1 using $results/table3/table3.tab, replace ///
	label wrap ///
	cells(b(fmt(3) star) se(fmt(3) par)) starlevels(\$^a\$ 0.10 \$^b\$ 0.05 \$^c\$ 0.01) ///
	style(tab) ///
	keep(inatfeat) 
	
	estout shore2 lake2 river2 slope2 pre2 flood2 ahat2 using $results/table3/table3.tab, append ///
	label wrap ///
	cells(b(fmt(3) star) se(fmt(3) par)) starlevels(\$^a\$ 0.10 \$^b\$ 0.05 \$^c\$ 0.01) ///
	style(tab) ///
	keep(inatfeat) 

	estout shore3 lake3 river3 slope3 pre3 flood3 ahat3 using $results/table3/table3.tab, append ///
	label wrap ///
	cells(b(fmt(3) star) se(fmt(3) par)) starlevels(\$^a\$ 0.10 \$^b\$ 0.05 \$^c\$ 0.01) ///
	style(tab) ///
	keep(inatfeat) 
	
	estout shore4 lake4 river4 slope4 pre4 flood4 ahat4 using $results/table3/table3.tab, append ///
	label wrap ///
	cells(b(fmt(3) star) se(fmt(3) par)) starlevels(\$^a\$ 0.10 \$^b\$ 0.05 \$^c\$ 0.01) ///
	style(tab) ///
	keep(inatfeat) 

}
*

* Figure 4
if `sharebyshore' {
	preserve
	capture mkdir $results/figure4/
	
	estimates drop _all
	capture log close

	bysort year: egen mean_shore = mean(i_shore)
	bysort year: egen mean_tshore = mean(t_shore)
	gen ratio = mean_tshore/mean_shore
	egen ytag = tag(year)
	
	levelsof year if ytag, local(yearlevels)
	twoway (line ratio year if ytag, lpattern(solid) lcolor(gs0) m(oh) ///
				xsize(6.5) ysize(3) xscale(r(1880 2010)) ///
				xlabel(`yearlevels',labsize()) ylabel(#6,format(%9.2f) labsize()) ///
				xtitle(" " "Year", size()) ///
				ytitle("Relative likelihood that top-decile neighborhoods" "are within 500m of ocean or Great Lake" " ", size()) ///
	       )
	graph export $results/figure4/figure4.pdf, replace	
	graph export $results/figure4/figure4.eps, replace	
	restore
}
*

* Table 4
if `endog' {
capture mkdir $results/table4

qui areg dpri i_shore pri                                               if lndens!=. & lnhuage!=. & lndcbd!=. & year==2000, absorb(mayear) vce(cluster mayear)
est sto end1
qui areg dpri i_shore pri lndport lndcbd lndens lnhuage                 if lndens!=. & lnhuage!=. & year==2000, absorb(mayear) vce(cluster mayear)
est sto end2
qui areg dpri i_shore pri lndport lndcbd lndens lnhuage sh_hu39         if lndens!=. & lnhuage!=. & year==2000, absorb(mayear) vce(cluster mayear)
est sto end3

egen foogroup = group(mayear id)
qui areg dpri i_shore pri lndport lndcbd lndens lnhuage sh_hu39         if WRLURI!=. & lndens!=. & lnhuage!=. & year==2000, absorb(mayear) vce(cluster foogroup)
est sto end4
qui areg dpri i_shore pri lndport lndcbd lndens lnhuage sh_hu39 WRLURI if lndens!=. & lnhuage!=. & year==2000, absorb(mayear) vce(cluster foogroup)
est sto end5

local reglist "end1 end2 end3 end4 end5"
local varlist "i_shore pri lndport lndcbd lndens lnhuage sh_hu39 WRLURI"

foreach var in i_shore pri lndport lndens lnhuage lndcbd sh_hu39 WRLURI {
qui sum `var' if year==2000 & dpri
local m`var' = trim("`: display %9.2f r(mean)'") 
local sd`var' = trim("`: display %9.2f r(sd)'")
}
sum dpri
estout  `reglist' using $results/table4/table4.txt, replace ///
cells(b(fmt(3) star) se(fmt(3) par)) starlevels(\$^a\$ 0.10 \$^b\$ 0.05 \$^c\$ 0.01) ///
style(tex) stats(r2 N estimator, fmt(%9.3f %9.0gc) labels("\$R^2\$" "Neighborhoods" "Estimator")) ///
keep(`varlist') wrap varwidth(21) ///
varlabels(pri "Initial \%ile rank by income \$(r_{i,t})\$" ///
i_shore "[0.5em]`=char(13)'1(Coast within 500m)" ///
pri "[0.5em]`=char(13)'Initial \%ile rank by income (\$r_{i,t}\$)" ///
lndens "[0.5em]`=char(13)'Log population density" ///
lndcbd "[0.5em]`=char(13)'Log distance to city center" ///
lnhuage "[0.5em]`=char(13)'Log average house age\$^*\$" ///
lndport "[0.5em]`=char(13)'Log distance to seaport" ///
sh_hu39 "[0.5em]`=char(13)'Share houses built before 1940\$^*\$" ///
WRLURI   "[0.5em]`=char(13)'Wharton res. land- use reg. index") ///
labcol2("`mi_shore' [`sdi_shore']" "`mpri' [`sdpri']" "`mlndport' [`sdlndport']" "`mlndcbd' [`sdlndcbd']" "`mlndens' [`sdlndens']"   "`mlnhuage' [`sdlnhuage']" "`msh_hu39' [`sdsh_hu39']" "`mWRLURI' [`sdWRLURI']" , title("\$\mu\$ [\$\sigma\$]")) ///
numbers   ///
prehead(\begin{tabular}{l*{@M}{c}c}  \hline\hline\\) ///
postfoot([1em]  \hline\hline \end{tabular})

estout  `reglist' using $results/table4/table4.tab, replace ///
cells(b(fmt(3) star) se(fmt(3) par)) starlevels(\$^a\$ 0.10 \$^b\$ 0.05 \$^c\$ 0.01) ///
style(tab) stats(r2 N estimator, fmt(%9.3f %9.0gc) labels("\$R^2\$" "Neighborhoods" "Estimator")) ///
keep(`varlist') wrap varwidth(21) ///
varlabels(pri "Initial \%ile rank by income \$(r_{i,t})\$" ///
i_shore "[0.5em]`=char(13)'1(Coast within 500m)" ///
pri "[0.5em]`=char(13)'Initial \%ile rank by income (\$r_{i,t}\$)" ///
lndens "[0.5em]`=char(13)'Log population density" ///
lndcbd "[0.5em]`=char(13)'Log distance to city center" ///
lnhuage "[0.5em]`=char(13)'Log average house age\$^*\$" ///
lndport "[0.5em]`=char(13)'Log distance to seaport" ///
sh_hu39 "[0.5em]`=char(13)'Share houses built before 1940\$^*\$" ///
WRLURI   "[0.5em]`=char(13)'Wharton res. land- use reg. index") ///
labcol2("`mi_shore' [`sdi_shore']" "`mpri' [`sdpri']" "`mlndport' [`sdlndport']" "`mlndcbd' [`sdlndcbd']" "`mlndens' [`sdlndens']"   "`mlnhuage' [`sdlnhuage']" "`msh_hu39' [`sdsh_hu39']" "`mWRLURI' [`sdWRLURI']" , title("\$\mu\$ [\$\sigma\$]")) ///
numbers 
}
*

* Figure B4
if `thresh' {
preserve
if `thresh' {
capture mkdir $results/appendix/figureb4
	* Graphs
		* Define program. the program runs the regressions 
		* from result 1 for varying proximity cutoffs and
		* stores the results in matrices. Program also
		* tests whether intially top-ranked tracts perform
		* better, as per theory. 
		capture program drop robustdist
		program define robustdist
			args feat dir maxdist pricut var	/* Feature; Direction of ineq; Proximity cutoff; Top-rank cutoff */
				capture drop x1
				capture drop x2
			gen x1 = `feat' `dir' `maxdist'  		/* adjust distance for proximity cutoff */
			gen x2 =  x1==1 & pri >= `pricut' 	/* adjust initial rank for top-rank cutoff */
			qui areg dpri x1 pri lndport lndens lnhuage lndcbd, vce(cluster mayear) abs(mayear)
				mat tempb1=e(b)
				mat tempV1=e(V)
			qui areg dpri x2 pri lndport lndens lnhuage lndcbd, vce(cluster mayear) abs(mayear)
				mat tempb2=e(b)
				mat tempV2=e(V)
			mat outcome = (`pricut', `maxdist', tempb1[1,1], tempV1[1,1]^0.5, tempb2[1,1], tempV2[1,1]^0.5)
			mat `feat'_stack = `feat'_stack\outcome
			drop x1 x2
		end
			
		* Run the program.
		set matsize 11000
			local feat = "avgslope"
			local var = "slope"
				mat `feat'_stack = (0,0,0,0,0,0)
				mat colnames `feat'_stack = pricut maxdist b1 std_b1 b2 std_b2
				forvalues maxdist = 5 (1) 25 {
					di "`feat', `maxdist'"
					robustdist `feat' ">=" `maxdist' 0.9 `var'
					robustdist `feat' ">=" `maxdist' 0 `var'
				}
			foreach var in shore river lake {
				local feat = "d2`var'"
				mat `feat'_stack = (0,0,0,0,0,0)
				mat colnames `feat'_stack = pricut maxdist b1 std_b1 b2 std_b2
				forvalues maxdist = 100 (100) 10000 {
					di "`feat', `maxdist'"
					robustdist `feat' "<=" `maxdist' 0.9 `var'
					robustdist `feat' "<=" `maxdist' 0 `var'
				}
			}

		* Convert outcome matrices to Stata datasets and save them.
		foreach feat in avgslope d2shore d2river d2lake {
			clear
			svmat `feat'_stack, names(col)
			drop if b1 == 0 & std_b1 ==0
			save "$results/appendix/figureb4/`feat'_stack.dta", replace
			}
}		
		
		* Draw figures.
			* First, define program
		capture program drop threshgraph 
		program define threshgraph
			args feat xmin maxdist xtitle title xl legendopt
			
			preserve
			
			local legend="legend(off)"
			local one="Indicator for natural amenity"
			local two=`""Indicator for natural amenity" "and initial rank in top decile""'
			if "`legendopt'"!="" {
				local legend=`"legend(region(lstyle(none)) symxsize(*.5) position(2) order(3 6) ring(0) rows(2)) legend(label(3 `one') size(vsmall) label(6 `two'))"'
			}
			
			use $results/appendix/figureb4/`feat'_stack.dta, clear
			keep if maxdist < `maxdist'
			
			foreach v in b1 b2 {
			gen `v'uci = `v' + 1.96*std_`v'
			gen `v'lci = `v' - 1.96*std_`v'
			}
			
			qui sum b1lci
				local b1min = r(min)
			qui sum b1uci
				local b1max = r(max)
			qui sum b2lci
				local b2min = r(min)
			qui sum b2uci
				local b2max = r(max)
				
			local ymin = min(`b1min', `b2min', 0)
			local ymax = max(`b2max', `b1max')
			local xmax = `maxdist'*1.05
	
			local pricut = 0.9
			local pricut00 = round(`pricut'*100)
			tempvar indicator 
			gen foo = round(pricut,0.1) == round(`pricut',0.1)
			twoway ///
			(line b1uci maxdist, lwidth(thin) lcolor(gs11) lpattern(dash)) ///
			(line b1lci maxdist, lwidth(thin) lcolor(gs11) lpattern(dash)) ///
			(line b1 maxdist, lwidth(medthin) lcolor(gs11)) ///
			(line b2uci maxdist if foo, lwidth(thin) lcolor(gs0) lpattern(dash)) ///
			(line b2lci maxdist if foo, lwidth(thin) lcolor(gs0) lpattern(dash)) ///
			(line b2 maxdist if foo, lwidth(medthin) lcolor(gs0) ///
				xline(`xl', lpattern(.)) ///
				yline(0, lpattern(.)) ///
				xsize(4) ysize(3) `legend' ///
				xlabel(,format(%9.0fc) labsize(vsmall)) ylabel(#6,format(%9.2f) labsize(vsmall)) ///
				yscale(r(`ymin' `ymax')) ///
				xscale(r(`xmin' `xmax')) ///
				xtitle(" " "Indicator variable used:" "`xtitle'", size(vsmall)) ///
				title("`title'" " ", size(small)) ///
				)
			graph save "$results/appendix/figureb4/`feat'_`pricut00'.gph", replace
			drop foo
			restore
		end
		
		threshgraph d2shore 0 5000 "Neighborhood is within X meters of ocean or Great Lake" "Ocean or Great Lake"  500 legend
		threshgraph d2lake 0  5000 "Neighborhood is within X meters of lake" "Lake" 500
		threshgraph d2river 0 3000 "Neighborhood is within X meters of river" "River" 500
		threshgraph avgslope 5 20 "Neighborhood has average slope at least X degrees" "Hills" 15 
					
		graph combine "$results/appendix/figureb4/d2shore_90.gph" ///
				      "$results/appendix/figureb4/d2lake_90.gph" ///
				      "$results/appendix/figureb4/d2river_90.gph" ///
				      "$results/appendix/figureb4/avgslope_90.gph", ///
				      imargin(0.1 0.1 0.3 0.1) ///
				      ycommon xsize(6.5) ysize(7.5) l1title("Effect on change in percentile rank" " ", size(vsmall))
		graph export "$results/appendix/figureb4/figureb4.pdf", replace
		graph export "$results/appendix/figureb4/figureb4.eps", replace
restore
}
*

* Figure 3
if `anchortime' {
capture mkdir $results/figure3

preserve

* Calculate indicator for metro observed in 1880
capture drop ma_1880 d1880
gen d1880 = year==1880 & pri!=.
bysort cbsa: egen ma_1880 = max(d1880)
levelsof mashort if ma_1880==1 & ma_shore, local(malevels)

* Calculate long-run changes in log rank
capture drop dpri10-dpri130
forvalues dd = 10(10)130 {
gen dpri`dd' = pri`dd'- pri
}

* Regressions
foreach feat in shore slope ahat {
	foreach year in 1880 1910 1920 1930 1940 1950 1960 1970 1980 1990 2000 {
		preserve
		mat drop _all
		mat stack = (`year',0,0,0)
		mat colnames stack = year dy b1 std_b1		
		forvalues dy = 10(10)130 {
			capture estimates clear
			capture areg dpri`dy' i_`feat' pri lndport lndens lndcbd if year==`year', absorb(mayear) vce(cluster cbsa)
				capture mat tempb1=e(b)
				capture mat tempV1=e(V)
			capture mat outcome = (`year', `dy', tempb1[1,1], tempV1[1,1]^0.5)
			capture mat stack = stack\outcome
		}
		clear
		svmat stack, names(col)
		save $results/figure3/`feat'_stack`year'.dta, replace
		restore
	}
}
foreach feat in shore slope ahat {
	use $results/figure3/`feat'_stack1880.dta, clear
	foreach year in 1910 1920 1930 1940 1950 1960 1970 1980 1990 2000 {
		append using $results/figure3/`feat'_stack`year'.dta
	}
	save $results/figure3/`feat'_stack_all.dta, replace
}


* Figure 3: Anchoring: Changes over time in 10-year e↵ects and long-run effects
local feat = "shore"
	use $results/figure3/`feat'_stack_all.dta, clear
	drop if dy>0 & std_b1==0
	gen y = year + dy
	drop if y>2010
	drop if y == 1920
	drop if year == 1920
	drop if year == 1880 & (inrange(dy,9,41))
	
	gen lci = b1-1.96*std_b1
	gen uci = b1+1.96*std_b1
	
	twoway (line b1 y if dy==10, lcolor(gs12) lwidth(vvvthick)) ///
		(scatter b1 y if b1-1.96*std_b1>0 | b1+1.96*std_b1<0,  msize(vlarge) mcolor(gs0) m(Oh)) ///
		   (connected b1 y if year==1880, msize(small) mcolor(gs2) lwidth(thin) lcolor(gs2)) ///
		   (scatter b1 y if year==1880 & dy==0, msize(large) mcolor(gs2)) ///
		   (connected b1 y if year==1930, msize(small) mcolor(cranberry) lwidth(thin) lcolor(cranberry)) ///
		   (scatter b1 y if year==1930 & dy==0, msize(large) mcolor(cranberry)) ///
		   (connected b1 y if year==1940, msize(small) mcolor(orange_red) lwidth(thin) lcolor(orange_red)) ///
		   (scatter b1 y if year==1940 & dy==0, msize(large) mcolor(orange_red)) ///
		   (connected b1 y if year==1950, msize(small) mcolor(emerald) lwidth(thin) lcolor(emerald)) ///
		   (scatter b1 y if year==1950 & dy==0, msize(large) mcolor(emerald)) ///
		   (connected b1 y if year==1960, msize(small) mcolor(midgreen) lwidth(thin) lcolor(midgreen)) ///
		   (scatter b1 y if year==1960 & dy==0, msize(large) mcolor(midgreen)) ///
		   (connected b1 y if year==1970, msize(small) mcolor(forest_green) lwidth(thin) lcolor(forest_green)) ///
		   (scatter b1 y if year==1970 & dy==0, msize(large) mcolor(forest_green)) ///
		   (connected b1 y if year==1980, msize(small) mcolor(midblue) lwidth(thin) lcolor(midblue)) ///
		   (scatter b1 y if year==1980 & dy==0, msize(large) mcolor(midblue)) ///
		   (connected b1 y if year==1990, msize(small) mcolor(blue) lwidth(thin) lcolor(blue)) ///
		   (scatter b1 y if year==1990 & dy==0, msize(large) mcolor(blue)) ///
		   (scatter b1 y if year==2000 & dy==0, msize(large) mcolor(purple)) ///
		   (connected b1 y if year==2000, msize(small) mcolor(purple) lwidth(thin) lcolor(purple) ///
		   legend(c(1) region(lcolor(white)) rowgap(*2) nobox order(1 3 5 2) label(1 "Change over 10 years") label(3 "Change since 1880") label(5 "Change since 1930, etc.") label(2 "p < 0.05")  pos(11) ring(0)) ///
		   yline(0, lpattern(dot)) ///
		   xsize(6.5) ysize(2) ///
		   ylabel(,format(%9.2f) labsize(large)) xlabel(1880 1930 1940 1950 1960 1970 1980 1990 2000 2010, labsize(large)) ///
		   xtitle("", size(large)) ///
		   ytitle("Effect of natural amenity" "on change in neighborhood income" " ", size(large)) ///
		   ) 
	 graph export $results/figure3/overtime_`feat'.pdf, replace
	 graph export $results/figure3/overtime_`feat'.eps, replace
	 
foreach feat in  slope ahat {
	use $results/figure3/`feat'_stack_all.dta, clear
	drop if dy>0 & std_b1==0
	gen y = year + dy
	drop if y>2010
	drop if y == 1920
	drop if year == 1920
	drop if year == 1880 & (inrange(dy,9,41))
	*drop if year<1940
	
	gen lci = b1-1.96*std_b1
	gen uci = b1+1.96*std_b1
	
	twoway (line b1 y if dy==10, lcolor(gs12) lwidth(vvvthick)) ///
		(scatter b1 y if b1-1.96*std_b1>0 | b1+1.96*std_b1<0,  msize(vlarge) mcolor(gs0) m(Oh)) ///
		   (connected b1 y if year==1880, msize(small) mcolor(gs2) lwidth(thin) lcolor(gs2)) ///
		   (scatter b1 y if year==1880 & dy==0, msize(large) mcolor(gs2)) ///
		   (connected b1 y if year==1930, msize(small) mcolor(cranberry) lwidth(thin) lcolor(cranberry)) ///
		   (scatter b1 y if year==1930 & dy==0, msize(large) mcolor(cranberry)) ///
		   (connected b1 y if year==1940, msize(small) mcolor(orange_red) lwidth(thin) lcolor(orange_red)) ///
		   (scatter b1 y if year==1940 & dy==0, msize(large) mcolor(orange_red)) ///
		   (connected b1 y if year==1950, msize(small) mcolor(emerald) lwidth(thin) lcolor(emerald)) ///
		   (scatter b1 y if year==1950 & dy==0, msize(large) mcolor(emerald)) ///
		   (connected b1 y if year==1960, msize(small) mcolor(midgreen) lwidth(thin) lcolor(midgreen)) ///
		   (scatter b1 y if year==1960 & dy==0, msize(large) mcolor(midgreen)) ///
		   (connected b1 y if year==1970, msize(small) mcolor(forest_green) lwidth(thin) lcolor(forest_green)) ///
		   (scatter b1 y if year==1970 & dy==0, msize(large) mcolor(forest_green)) ///
		   (connected b1 y if year==1980, msize(small) mcolor(midblue) lwidth(thin) lcolor(midblue)) ///
		   (scatter b1 y if year==1980 & dy==0, msize(large) mcolor(midblue)) ///
		   (connected b1 y if year==1990, msize(small) mcolor(blue) lwidth(thin) lcolor(blue)) ///
		   (scatter b1 y if year==1990 & dy==0, msize(large) mcolor(blue)) ///
		   (scatter b1 y if year==2000 & dy==0, msize(large) mcolor(purple)) ///
		   (connected b1 y if year==2000, msize(small) mcolor(purple) lwidth(thin) lcolor(purple) ///
		   legend(off) yline(0, lpattern(dot)) ///
		   xsize(6.5) ysize(2) ///
		   ylabel(,format(%9.2f) labsize(large)) xlabel(1880 1930 1940 1950 1960 1970 1980 1990 2000 2010, labsize(large)) ///
		   xtitle("", size(large)) ///
		   ytitle("Effect of natural amenity" "on change in neighborhood income" " ", size(large)) ///
		   ) 
	 graph export $results/figure3/overtime_`feat'.pdf, replace
	 graph export $results/figure3/overtime_`feat'.eps, replace
	 }
restore
}

*

* Section 5: Persistence results
if `stable' {
capture mkdir $results/table5
capture mkdir $results/table6
capture mkdir $results/figure5
capture mkdir $results/figure6

* COMPUTE VARIANCE IN FORWARD TRACT RANK, YYYY-2010
* For balanced panel of cities
forvalues cc=0(10)130 {
	gen zpri`cc' = pri`cc'
	replace zpri`cc' = . if zpri`cc'==0
}
order zpri0 zpri10 zpri20 zpri30 zpri40 zpri50 zpri60 zpri70 zpri80 zpri90 zpri100 zpri110 zpri120 zpri130
* First select forward years by census year
* 1880 
egen sdrank1880 = rowsd(zpri0  zpri80 zpri90 zpri100 zpri110 zpri120 zpri130) if year==1880
replace sdrank1880 = cond(mi(zpri0, zpri80, zpri90, zpri100, zpri110, zpri120, zpri130), ., sdrank1880) if year==1880 
* 1930
egen sdrank1930 = rowsd(zpri0 zpri10 zpri20 zpri30 zpri40 zpri50 zpri60 zpri70 zpri80) if year==1930
replace sdrank1930 = cond(mi(zpri0, zpri10, zpri20, zpri30, zpri40, zpri50, zpri60, zpri70, zpri80), ., sdrank1930) if year==1930
* 1940
egen sdrank1940 = rowsd(zpri0 zpri10 zpri20 zpri30 zpri40 zpri50 zpri60 zpri70) if year==1940
replace sdrank1940 = cond(mi(zpri0, zpri10, zpri20, zpri30, zpri40, zpri50, zpri60, zpri70), ., sdrank1940) if year==1940
* 1950
local y 1950
egen sdrank`y' = rowsd(zpri0 zpri10 zpri20 zpri30 zpri40 zpri50 zpri60) if year==`y'
replace sdrank`y' = cond(mi(zpri0, zpri10, zpri20, zpri30, zpri40, zpri50, zpri60), ., sdrank`y') if year==`y'
* 1960
local y 1960
egen sdrank`y' = rowsd(zpri0 zpri10 zpri20 zpri30 zpri40 zpri50) if year==`y'
replace sdrank`y' = cond(mi(zpri0, zpri10, zpri20, zpri30, zpri40, zpri50), ., sdrank`y') if year==`y'
* 1970
local y 1970
egen sdrank`y' = rowsd(zpri0 zpri10 zpri20 zpri30 zpri40) if year==`y'
replace sdrank`y' = cond(mi(zpri0, zpri10, zpri20, zpri30, zpri40), ., sdrank`y') if year==`y'
* 1980
local y 1980
egen sdrank`y' = rowsd(zpri0 zpri10 zpri20 zpri30) if year==`y'
replace sdrank`y' = cond(mi(zpri0, zpri10, zpri20, zpri30), ., sdrank`y') if year==`y'
* 1990
local y 1990
egen sdrank`y' = rowsd(zpri0 zpri10 zpri20) if year==`y'
replace sdrank`y' = cond(mi(zpri0, zpri10, zpri20), ., sdrank`y') if year==`y'
* 2000
local y 2000
egen sdrank`y' = rowsd(zpri0 zpri10) if year==`y'
replace sdrank`y' = cond(mi(zpri0, zpri10), ., sdrank`y') if year==`y'

gen varrank = .
gen sdrank = .
foreach yy in 1880 1930 1940 1950 1960 1970 1980 1990 2000 {
replace sdrank = sdrank`yy' if year==`yy'
replace varrank = sdrank`yy'*sdrank`yy' if year==`yy'
}

replace sdrank = sdrank*100
replace varrank = varrank*100 /* rescale */
compress

* MA-YEAR CONTROLS
* Change in metro pop, area
gen y2010 = year==2010
foreach yy in 1880 1930 1940 1950 1960 1970 1980 1990 2000 {
gen y`yy' = year==`yy'
foreach var in pop area {
gen foo`var'`yy' = ma_`var' * y`yy'
gen foo`var'2010 = ma_`var' * y2010
bysort mashort: egen m`var'`yy' = max(foo`var'`yy')
bysort mashort: egen m`var'2010 = max(foo`var'2010)
gen dm`var'`yy' = ln(m`var'2010) - ln(m`var'`yy')
drop foo* m`var'2010
}
drop y`yy'
}

* Variance in neighborhood income, by ma-year
gen w = (tr_hh_avginc)
replace w = (tr_hh_avgocs) if year==1880 & w==.		/* use occ score, educ, rent when inc is n/a/ */
replace w = tr_pop_shlit if year==1910 | year==1920 & w==.
replace w = (tr_hu_avgrnt) if year==1930 | year==1940 & w==.
gen lnw = ln(w)
bysort mayear: egen sd_income = sd(w)
replace sd_income = sd_income/1000

* Variance in housing age, by ma-year
bysort mayear: egen sd_huage = sd(tr_hu_avgage)

* VARIANCE OF NATURAL AMENITY WITHIN MSA 
* Calculate variance in natural amenity, by ma-year
* Including dummies for ma/natural amenity
* This is variable of interest
* Shore
foreach feat in shore {
	gen lnd`feat' = ln(d2`feat')
	bysort mayear: egen sd_`feat' = sd(lnd`feat')
	gen var_`feat' = sd_`feat' * sd_`feat'
}
bysort mayear: egen mindshore = min(d2shore)
qui sum mindshore if year==2010 & mytag,d
* hedonic value
forvalues n = 8/8 {
	bysort mayear: egen sd_ahat`n' = sd(ahat`n')
	gen var_ahat`n' = sd_ahat`n'^2
	qui sum var_ahat`n' if mytag
	gen ma_ahat`n' = var_ahat`n'>r(mean)
}
* Top/bottom deciles of income for first level regressions
xtile prix = pri, nq(10)
replace prix = 0 if prix >= 2 & prix <= 9

* Regressions. Level 1.
* Only need to run this code once
if 1 {
xi i.cbsa, noomit
foreach y in 1880 1930 1940 1950 1960 1970 1980 1990 2000 {
mat drop _all
* sd
qui reg sdrank _Icbsa* if year==`y', noconstant vce(robust)
	mat tempb1=e(b)
	mat tempV1=e(V)
* control nonlinearly for pri deciles
qui reg sdrank ib0.prix _Icbsa* if year==`y', noconstant vce(robust)
	mat tempb3=e(b)
	mat tempV3=e(V)

mat stack = (0,0,0,0,0)
mat colnames stack = cbsa b`y' V`y' z3`y' zV3`y'

qui levelsof cbsa, local(cbsalevels)
foreach ll of local cbsalevels {
	mat outcome = (`ll',tempb1[1,"_Icbsa_`ll'"], tempV1["_Icbsa_`ll'"," _Icbsa_`ll'"], tempb3[1,"_Icbsa_`ll'"], tempV3["_Icbsa_`ll'"," _Icbsa_`ll'"])
	mat stack = stack\outcome
}

preserve
clear
svmat stack, names(col)
replace b`y'=. if b`y' == 0
replace V`y'=. if V`y' == 0
replace z3`y'=. if z3`y' == 0
replace zV3`y'=. if zV3`y' == 0
drop if cbsa==0
sort cbsa
save $results/table5/stack`y'.dta, replace
restore
}
drop _Icbsa_*
}

foreach y in 1880 1930 1940 1950 1960 1970 1980 1990 2000 {
sort cbsa
merge m:1 cbsa using $results/table5/stack`y'.dta, nogenerate
}

foreach y in 1880 1930 1940 1950 1960 1970 1980 1990 2000 {
gen wt`y' = 1/V`y'
gen z3wt`y' = 1/zV3`y'
}	

local ctrlvars "dmpop1960 dmarea1960"
local ctrlvarsplus "dmpop1960 dmarea1960 sd_income sd_huage"
local feat "shore"
capture drop nh
gen nh = ma_`feat'
foreach y in 1960 {
qui reg b`y' nh		 		    [w=wt`y'] if mytag & year==`y', vce(robust)
est sto ma1
qui reg b`y' nh `ctrlvars'	    [w=wt`y'] if mytag & year==`y', vce(robust)
est sto ma2
qui reg z3`y' nh `ctrlvarsplus'  [w=z3wt`y'] if mytag & year==`y', vce(robust)
est sto ma3
replace nh = sd_`feat'
qui reg z3`y' nh `ctrlvarsplus'  [w=z3wt`y'] if mytag & year==`y', vce(robust)
est sto sd3
replace nh = sd_ahat8
qui reg z3`y' nh `ctrlvarsplus'  [w=z3wt`y'] if mytag & year==`y', vce(robust)
est sto sd4
}

* Table 5. Persistence in metros with variation in coastal proximity, 1960–2010
local reglist "ma1 ma2 ma3 sd3 sd4"
estimates table `reglist', b(%9.3f) se(%9.3f) p(%9.3f) stats(N r2) keep(nh `ctrlvarsplus')
foreach var in `ctrlvarsplus' {
qui sum `var' if mytag & year==1960
local m`var' = trim("`: display %9.2f r(mean)'") 
local sd`var' = trim("`: display %9.2f r(sd)'")
}
estout `reglist' using $results/table5/table5.txt, replace ///
label wrap ///
cells(b(fmt(3) star) se(fmt(3) par)) starlevels(\$^a\$ 0.10 \$^b\$ 0.05 \$^c\$ 0.01) ///
style(tex) stats(r2 N, fmt(%9.3f %9.0g) labels("[0.5em]`=char(13)'\$R^2\$" "Metro areas")) ///
keep(nh dmpop1960 dmarea1960 sd_income sd_huage) varwidth(23) ///
varlabels(nh "[0.5em]`=char(13)'Variation in coastal proximity" ///
dmpop1960 "[0.5em]`=char(13)'Metro log change in population, 1960--2010" ///
dmarea1960 "[0.5em]`=char(13)'Metro log change in land area, 1960--2010" ///
sd_income "[0.5em]`=char(13)'Within-metro s.d. in nbhd. income (thous.)" ///
sd_huage "[0.5em]`=char(13)'Within-metro s.d. in nbhd. avg. house age") ///
labcol2(" " "`mdmpop1960' [`sddmpop1960']" "`mdmarea1960' [`sddmarea1960']" "`msd_income' [`sdsd_income']" "`msd_huage' [`sdsd_huage']", title("\$\mu\$ [\$\sigma\$]")) ///
numbers  ///
prehead(\begin{tabular}{l*{@M}{c}c}  \toprule\\) ///
postfoot([0.5em]  \bottomrule \end{tabular})

estout `reglist' using $results/table5/table5.tab, replace ///
label wrap ///
cells(b(fmt(3) star) se(fmt(3) par)) starlevels(\$^a\$ 0.10 \$^b\$ 0.05 \$^c\$ 0.01) ///
style(tab) stats(r2 N, fmt(%9.3f %9.0g) labels("[0.5em]`=char(13)'\$R^2\$" "Metro areas")) ///
keep(nh dmpop1960 dmarea1960 sd_income sd_huage) varwidth(23) ///
varlabels(nh "[0.5em]`=char(13)'Variation in coastal proximity" ///
dmpop1960 "[0.5em]`=char(13)'Metro log change in population, 1960--2010" ///
dmarea1960 "[0.5em]`=char(13)'Metro log change in land area, 1960--2010" ///
sd_income "[0.5em]`=char(13)'Within-metro s.d. in nbhd. income (thous.)" ///
sd_huage "[0.5em]`=char(13)'Within-metro s.d. in nbhd. avg. house age") ///
labcol2(" " "`mdmpop1960' [`sddmpop1960']" "`mdmarea1960' [`sddmarea1960']" "`msd_income' [`sdsd_income']" "`msd_huage' [`sdsd_huage']", title("\$\mu\$ [\$\sigma\$]")) ///
numbers

log using $results/table5/stats.log, replace
sum ma_shore sd_shore sd_ahat if mytag & year==1960 & e(sample)
qui areg sdrank if year==1960, abs(mayear)
sum sdrank if  e(sample)
sum b1960 if year==1960 & mytag
log close

* Figure 5. Persistence: Neighborhood volatility and metro growth
sum b1960 if mytag & year==1960, d
gen outlier = year==1960 & inlist(mashort,"Merced CA", "Stockton CA", "Altoona PA")
gen mv = 10
replace mv = 5 if inlist(mashort, "Altoona PA")
replace mv = 1 if inlist(mashort, "Stockton CA")
twoway (scatter dmpop1960 dmarea1960 if year==1960 & mytag & b1960 <`r(p25)', m(X) mcolor(red)) ///
       (scatter dmpop1960 dmarea1960 if year==1960 & mytag & (b1960 >=`r(p25)' & b1960<`r(p50)'), m(o) mcolor(gs12)) ///
	   (scatter dmpop1960 dmarea1960 if year==1960 & mytag & (b1960 >=`r(p50)' & b1960<`r(p75)'), m(o) mcolor(gs12)) ///
	   (scatter dmpop1960 dmarea1960 if year==1960 & mytag & (b1960 >=`r(p75)'), m(+) mcolor(green)) ///
	   (scatter dmpop1960 dmarea1960 if outlier & mytag & year==1960, m(i) mlabel(mashort) mlabsize(small)  mlabvpos(mv) ///
	   xlabel(,labsize(small)) ylabel(,labsize(small)) ///
	   ytitle("Log change in metro population 1960-2010" " ", size(small)) xtitle(" " "Log change in metro land area 1960-2010", size(small)) ///
	   legend(pos(10) ring(0) region(lstyle(none)) size(small) order(1 4 2) c(1) label(1 "Average neighborhood volatility < 25th %ile") label(2 "Other metropolitan areas") label(4 "Average neighborhood volatility > 75th %ile")))
graph export $results/figure5/figure5.pdf, replace
graph export $results/figure5/figure5.eps, replace

	   
* Table 6. Persistence: Robustness to other years and measures of within-metro natural heterogeneity
capture mkdir $results/table6
lab var sd_shore "Within-metro s.d. in log nbhd. distance to coast"
lab var sd_ahat8 "Within-metro s.d. in nbhd. natural value"

capture log close
log using $results/table6/stats.log,  replace
log close

* metro indicator for shore
foreach y in 1880 1930 1940 1950 1960 1970 1980 1990 2000 {
qui reg b`y' ma_shore dmpop`y' dmarea`y' [w=wt`y'] if mytag & year==`y', vce(robust)
est sto r`y'
} 
qui sum ma_shore if mytag & year==2010
local m = trim("`: display %9.2f r(mean)'") 
local sd = trim("`: display %9.2f r(sd)'")
estout r1880 r1930 r1940 r1950 r1960 r1970 r1980 r1990 r2000 using $results/table6/table6.txt, replace ///
label wrap ///
cells(b(fmt(3) star) se(fmt(3) par)) starlevels(\$^a\$ 0.10 \$^b\$ 0.05 \$^c\$ 0.01) ///
style(tex) stats(r2 N, fmt(%9.3f %9.3f %9.0g) labels(\$R^2\$)) ///
labcol2("`m' [`sd']", title("\$\mu\$ [\$\sigma\$]")) ///
keep(ma_shore) 

estout r1880 r1930 r1940 r1950 r1960 r1970 r1980 r1990 r2000 using $results/table6/table6.tab, replace ///
label wrap ///
cells(b(fmt(3) star) se(fmt(3) par)) starlevels(\$^a\$ 0.10 \$^b\$ 0.05 \$^c\$ 0.01) ///
style(tab) stats(r2 N, fmt(%9.3f %9.3f %9.0g) labels(\$R^2\$)) ///
labcol2("`m' [`sd']", title("\$\mu\$ [\$\sigma\$]")) ///
keep(ma_shore) 

* SD shore and SD ahat
foreach feat in shore ahat8 {
foreach y in 1880 1930 1940 1950 1960 1970 1980 1990 2000 {
qui reg b`y' sd_`feat' dmpop`y' dmarea`y' [w=wt`y'] if mytag & year==`y', vce(robust)
est sto s`y'
log using $results/table6/stats.log,  append
di `y'
sum sdrank if e(sample)
log close
}

qui sum sd_`feat' if mytag & year==2010
local m = trim("`: display %9.2f r(mean)'") 
local sd = trim("`: display %9.2f r(sd)'")
estout s1880 s1930 s1940 s1950 s1960 s1970 s1980 s1990 s2000 using $results/table6/table6.txt, append ///
label wrap ///
cells(b(fmt(3) star) se(fmt(3) par)) starlevels(\$^a\$ 0.10 \$^b\$ 0.05 \$^c\$ 0.01) ///
style(tex) stats(r2 N, fmt(%9.3f %9.3f %9.0g) labels(\$R^2\$)) ///
labcol2("`m' [`sd']", title("\$\mu\$ [\$\sigma\$]")) ///
keep(sd_`feat') 

estout s1880 s1930 s1940 s1950 s1960 s1970 s1980 s1990 s2000 using $results/table6/table6.tab, append ///
label wrap ///
cells(b(fmt(3) star) se(fmt(3) par)) starlevels(\$^a\$ 0.10 \$^b\$ 0.05 \$^c\$ 0.01) ///
style(tab) stats(r2 N, fmt(%9.3f %9.3f %9.0g) labels(\$R^2\$)) ///
labcol2("`m' [`sd']", title("\$\mu\$ [\$\sigma\$]")) ///
keep(sd_`feat') 
}

* Figure 6. Neighborhoods in naturally heterogeneous cities experience smaller over-time fluctuations in income
gen mashorter = substr(mashort, 1, length(mashort)-3)

capture drop foo 
capture drop foo2
qui reg b1960 dmpop1960 dmarea1960 [w=wt1960] if mytag & year==1960
predict foo, resid
local y = 1960
local feat = "ahat8"
reg foo sd_`feat' if mytag & year==1960
capture predict foo2 if e(sample), resid
qui sum sd_`feat', d
local p1y  `r(p1)'
local p99y = `r(p99)'
twoway (lfit foo sd_`feat' if mytag & year==`y' [w=z3wt1960], lcolor(midgreen)) ///
   (scatter foo sd_`feat' if mytag & year==`y' & (ma_pop>1000000 | foo2>3 | foo2<-3 | sd_`feat'<`p1y' | sd_`feat'>`p99y'), ///
	m(oh) msize(tiny) mlabel(mashorter) mlabsize(vsmall) mlabcolor(gs10) mcolor(gs10) ///
	xtitle(" " "Within-city SD in natural value, `y'", size(small)) ///
	ytitle("Residual over-time SD in neighborhood income, `y'-2010" " ", size(small)) ///
	legend(off) xscale(r(0 .22)) ///
	xlabel(,format(%9.2f) labsize(small)) ylabel(,labsize(small)) ///
	) ///
   (scatter foo sd_`feat' if mytag & year==`y', m(oh) msize(tiny) mcolor(gs10))
graph export $results/figure6/figure6.eps, replace
graph export $results/figure6/figure6.pdf, replace
drop foo foo2
}	
*

* Section 6: Decentralization results
if `suburb' {
capture mkdir "$results/table7"
capture mkdir "$results/figure7"
capture mkdir "$results/appendix/figureb1"

* Define variables
cap drop i_cbd
gen i_cbd = d2cbdkm < 5 if d2cbdkm!=.

gen dcbdjobsh3 = sharea06-sharea98
gen dcbdjobsh10 = shareb06-shareb98
gen dcbdjobsh10p = sharec06-sharec98
gen i_cbdxjobdc1 = i_cbd*dcbdjobsh3
gen i_cbdxjobdc2 = i_cbd*dcbdjobsh10
gen i_cbdxjobdc3 = i_cbd*dcbdjobsh10p


* Statistics reported in footnote in Section 6
bysort mayear: egen maxd = max(d2cbd)
log using $results/text/city_extent.log, replace
sum maxd if mytag & year==1880, d
count if mytag & year==1880 & maxd>15000
log close

* Statistic reported in section 4.1:  Average rate of decline of central city neighborhoods, 1950-1980
log using $results/text/downtown_decline.log, replace
sum dpri if i_cbd==1 & year>=1950 & year<=1980
log close

* Define interaction terms.
cap drop i_cbdxshore
gen i_cbdxshore = i_cbd*ma_shore
gen lndcbdxshore = lndcbd*ma_shore

cap drop i_cbdxahat
bysort mayear: egen sd_ahat = sd(ahat)
gen i_cbdxahat = i_cbd*sd_ahat

qui areg dpri pri ahat8, absorb(mayear)
qui sum ahat8 if e(sample)
capture gen dm_ahat = ahat8 - `r(mean)'
qui sum sd_ahat if e(sample) & mytag
gen std_sd_ahat = (sd_ahat - `r(mean)')/`r(sd)'
gen ahatXsd_ahat = dm_ahat * std_sd_ahat
gen i_ahatXsd_ahat = i_ahat * std_sd_ahat

* Regression reported in section 6 text
log using $results/text/equation11_estimates.log, replace
areg dpri i_ahat ahatXsd_ahat pri lndport lndens lnhuage lndcbd, absorb (mayear) vce(cluster mayear)
est sto aXsd1
log close

* Regressions
qui areg dpri i_cbd i_cbdxshore, absorb(mayear) vce(cluster mayear) 
est sto cbd1

qui areg dpri i_cbd i_cbdxshore pri, absorb(mayear) vce(cluster mayear) 
est sto cbd2

qui areg dpri i_cbd i_cbdxshore pri lndport lndens         if year>=1910 & year<2010, absorb(mayear) vce(cluster mayear) 
est sto cbd3

qui areg dpri i_cbd i_cbdxshore pri lndport lndens lnhuage if year>=1950 & year<1980, absorb(mayear) vce(cluster mayear) 
est sto cbd4

qui areg dpri i_cbd i_cbdxshore pri lndport lndens lnhuage if year>=1980 & year<2010, absorb(mayear) vce(cluster mayear) 
est sto cbd5

qui areg dpri i_cbd i_cbdxshore pri lndport lndens lnhuage if year>=1990 & i_cbdxjobdc1!=., absorb(mayear) vce(cluster mayear) 
est sto cbd6

qui areg dpri i_cbd i_cbdxshore pri lndport lndens lnhuage i_cbdxjobdc1 i_cbdxjobdc2 if year>=1990, absorb(mayear) vce(cluster mayear) 
est sto cbd7

local reglist "cbd1 cbd2 cbd3 cbd4 cbd5 cbd6 cbd7"
local varlist "i_cbd i_cbdxshore pri lndport lndens lnhuage i_cbdxjobdc1 i_cbdxjobdc2"
log using $results/table7/table7.log, append
est table `reglist',  b(%9.3f) se(%9.3f) p(%9.3f)  stats(r2 rmse N N_clust) keep(`varlist')
log close

* Table 7
estout `reglist' using $results/table7/table7.txt, replace ///
	cells(b(fmt(3) star) se(fmt(3) par)) ///
	starlevels(\$^a\$ 0.10 \$^b\$ 0.05 \$^c\$ 0.01) ///
	style(tex) ///
	stats(r2 N, fmt(%9.3f %9.0gc) labels("\$R^2\$" "Neighborhoods")) ///
	keep(`varlist') wrap varwidth(23) ///
	varlabels(i_cbd "[1em]`=char(13)' 1(CBD)\$^*\$" ///
	i_cbdxshore "[1em]`=char(13)'1(CBD) \$\times\$ 1(Coastal metro)\$^\dag\$" ///
	pri "[1em]`=char(13)'Initial \%ile rank by income \$(r_{i,t})\$" ///
	lndport "[1em]`=char(13)'Log distance to seaport" ///
	lndens "[1em]`=char(13)'Log population density" ///
	lnhuage "[1em]`=char(13)'Log average house age\$^{\ddagger}\$" ///
	i_cbdxjobdc1 "[1em]`=char(13)' 1(CBD) \$\times\$ \$\Delta\$ CBD job share\$^{\mathsection}\$" ///
	i_cbdxjobdc2 "[1em]`=char(13)' 1(CBD) \$\times\$ \$\Delta\$ 3--10mijob share\$^{\||}\$") ///
	numbers ///
	prehead(\begin{tabular}{l*{@M}{c}c}  \hline\hline\\) ///
	postfoot([1em]  \hline\hline \end{tabular})

	estout `reglist' using $results/table7/table7.tab, replace ///
	cells(b(fmt(3) star) se(fmt(3) par)) ///
	starlevels(\$^a\$ 0.10 \$^b\$ 0.05 \$^c\$ 0.01) ///
	style(tab) ///
	stats(r2 N, fmt(%9.3f %9.0gc) labels("\$R^2\$" "Neighborhoods")) ///
	keep(`varlist') wrap varwidth(23) ///
	varlabels(i_cbd "[1em]`=char(13)' 1(CBD)\$^*\$" ///
	i_cbdxshore "[1em]`=char(13)'1(CBD) \$\times\$ 1(Coastal metro)\$^\dag\$" ///
	pri "[1em]`=char(13)'Initial \%ile rank by income \$(r_{i,t})\$" ///
	lndport "[1em]`=char(13)'Log distance to seaport" ///
	lndens "[1em]`=char(13)'Log population density" ///
	lnhuage "[1em]`=char(13)'Log average house age\$^{\ddagger}\$" ///
	i_cbdxjobdc1 "[1em]`=char(13)' 1(CBD) \$\times\$ \$\Delta\$ CBD job share\$^{\mathsection}\$" ///
	i_cbdxjobdc2 "[1em]`=char(13)' 1(CBD) \$\times\$ \$\Delta\$ 3--10mijob share\$^{\||}\$") ///
	numbers 



* Figure 7
foreach yy in 1880 1970 {
twoway (lowess pri d2cbdkm if ma_shore==1 & year==`yy' & d2cbd<15000, lcolor(gs5)) ///
(lowess pri d2cbdkm if ma_shore==0 & year==`yy' & d2cbd<15000, lcolor(gs0) lpattern(--) ///
title("`yy'" " ", size(medsmall)) ///
xtitle("") ///
ytitle("") ///
xsize(2) ysize(2) `legend' ///
xlabel(,format(%9.0f) labsize(vsmall)) ylabel(0(0.25)1,format(%9.2f) labsize(vsmall)) ///
yscale(r(0 1)) ///			 
legend(off) ///
)
graph save $results/figure7/`yy'.gph, replace
}

foreach yy in 1930 1940 1950 1960 1980 1990 2000 {
twoway (lowess pri d2cbdkm if ma_shore==1 & year==`yy' & d2cbd<15000, lcolor(gs5)) ///
(lowess pri d2cbdkm if ma_shore==0 & year==`yy' & d2cbd<15000, lcolor(gs0) lpattern(--) ///
title("`yy'" " ", size(medsmall)) ///
xtitle("") ///
ytitle("") ///
xsize(2) ysize(2) `legend' ///
xlabel(,format(%9.0f) labsize(vsmall)) ylabel(none) ///
yscale(r(0 1)) ///			 
legend(off) ///
)
graph save $results/figure7/`yy'.gph, replace
}
*

local yy = 2010
twoway (lowess pri d2cbdkm if ma_shore==1 & year==`yy' & d2cbd<15000, lcolor(gs5)) ///
(lowess pri d2cbdkm if ma_shore==0 & year==`yy' & d2cbd<15000, lcolor(gs0) lpattern(--) ///
legend(label(1 "Coastal cities") label(2 "Interior cities") size(vsmall)) ///
title("`yy'" " ", size(medsmall)) ///
xtitle("") ///
ytitle("") ///
xsize(3) ysize(4) `legend' ///
xlabel(,format(%9.0f) labsize(vsmall)) ylabel(none) ///
yscale(r(0 1)) ///			 
legend(region(lstyle(none)) ring(0) pos(12) rows(2)) /// 
	 )
graph save $results/figure7/`yy'.gph, replace


graph combine $results/figure7/1880.gph $results/figure7/1930.gph ///
$results/figure7/1940.gph $results/figure7/1950.gph ///
$results/figure7/1960.gph $results/figure7/1970.gph ///
$results/figure7/1980.gph $results/figure7/1990.gph ///
$results/figure7/2000.gph ///
$results/figure7/2010.gph ///
,ycommon  rows(2) xsize(9) ysize(4.5) ///
iscale(0.8) imargin(tiny) ///
l1title("Percentile rank" " ", size(vsmall)) ///
b1title(" " "Distance to central business district, km", size(vsmall))
graph export $results/figure7/figure7.pdf, replace
graph export $results/figure7/figure7.eps, replace

* Figure B1
preserve
capture drop ma_1880 d1880
gen d1880 = year==1880 & pri!=.
bysort cbsa: egen ma_1880 = max(d1880)
bysort mashort year: egen n_pri = count(pri)
replace mashort = "Louisville KY" if mashort=="Louisville/Jefferson County KY"
replace mashort = "San Francisco CA" if mashort=="San Jose CA"
lab var d2cbdkm "Distance to central business district, km"
lab var ma_shore "interior (0) or coastal (1)"
lab var mashort "metropolitan area"
drop if mashort=="Minneapolis MN" & year==1880 & d2cbd<15000 & d2cbdkm>4
/* One errant observation close to St Paul */
drop if mashort=="San Francisco CA" & year==1880 & d2cbd<15000 & d2cbdkm>10
/* A few Oakland tracts */

twoway lowess pri d2cbdkm if ma_1880 & year==1880 & d2cbd<15000 & n_pri>20, ///
bw(0.9) adjust lcolor(gs6)  xlabel(0(5)15) ylabel(#3,nogrid) xscale(r(0 15)) ///
by(ma_shore mashort, holes(10) style(default) imargin(zero) ///
l1title("Percentile rank in 1880" " ", size(quarter_tiny)) /// 
b1title(" " "Distance to central business district, km", size(quarter_tiny)) ///
note(" "))
graph export "$results/appendix/figureb1.pdf", replace
graph export "$results/appendix/figureb1.eps", replace
restore
}


